
/*-----------------------------------------------*
Name: Haqdarshak Pilot Intervention-Cleaning
Date Created: 30 July, 2018
Date Last Modified: 28 April, 2021
Created by: Aaron Berman and modified by Daniela Paz and Saumya Mathur on Nov.13/2020  
Description: Clean raw data to create a intermediate scheme amounts file
*-------------------------------------------------*/



*..................................................
**Install user-written commands
foreach package in mdesc nmissing veracrypt {
     capture which `package'
	 if _rc==111 ssc install `package'
}
*..................................................
clear all
set more off
version 12.0
cap log close
pause off



//open log 
*cd "$logs"
*log using "cleaning_scheme_amounts.smcl", replace



*********Tracking sheets
	cd "$pilot"
	use "pilot_intervention_tracking_sheets_all", clear 
	 
	*------*
	//merge in completed applications data 
	merge m:1 member_id scheme_id using "pilot_intervention_completed_apps.dta"
	tab _merge
	drop if _merge == 2 //there should be 2 observations here that don't merge
	gen applied_scheme = (_merge == 2 | _merge == 3)

	//collapse by household 
	bysort household_id scheme_id: egen hh_price = mode(scheme_offered_price), maxmode
	collapse (sum) num_agree_hh=agreed_scheme_amount num_apply_hh=applied_scheme (count) num_offer_hh=row (mean) hh_scheme_price=hh_price scheme_normal_price, by(household_id scheme_id)
	gen discount = 1 - (hh_scheme_price / scheme_normal_price)

	//generate relevant variables 
	gen atleast1_offered_hh = (num_offer_hh > 0 & !missing(num_offer_hh))
	gen atleast1_agreed_hh = (num_agree_hh > 0 & !missing(num_agree_hh))
	gen atleast1_applied_hh = (num_apply_hh > 0 & !missing(num_apply_hh))

	pause
	tostring household_id, replace


	//save tempfile for later use
	tempfile schemes_all_main
	ren household_id household_id_string
	destring household_id_string, gen(household_id)
	save `schemes_all_main', replace 
	pause


	//open cases data 
	cd "$pilot"
	use "pilot_intervention_citizen_cumulative_all", clear 
	bysort household_id: keep if _n == 1
	tempfile cases_all 
	save `cases_all', replace 


	//merge with tracking sheets 
	use `schemes_all_main', clear 
	merge m:1 household_id using `cases_all', gen(merge_cases)
	tab merge_cases 
	pause


	//save tempfile for later use
	tempfile schemes_all_main
	save `schemes_all_main', replace 


	***REGRESSIONS***
	//start with baseline household data 
	use "$pilot/hh_data_treatment_b", clear 

	//create other relevant household-level variables
	//disability
	egen num_disabled = rowtotal(disabled_*)
	gen any_disability_hh = (num_disabled > 0) if !missing(num_disabled)

	gen member_male_1 = (member_gender_1 == 1)

	keep household_id any_disability_hh above_48000 enter_annual_income caste* applied_for_govt_scheme member_male_1
	tostring household_id, replace
	tempfile baseline_characteristics
	save `baseline_characteristics', replace 

	//get village & stratum designations 
	use "$pilot/hh_data_treatment_b", clear 
	keep household_id village_id stratum
	tostring household_id, replace
	tempfile village_strata
	save `village_strata', replace 


	**********************************************************
	use "$pilot/hh_data_treatment_b", clear 

	preserve
	keep household_id annual_income housekind_kuccha housekindobserve_kuccha enter_annual_income
	tempfile hh_attributes
	save "`hh_attributes'"
	restore

	use "$pilot/individual_data_treatment_b", clear 

	** merge with household attributes
	merge m:1 household_id using "`hh_attributes'"
	assert _merge == 3
	drop _merge

	** drop if missing
	drop if missing(enter_years_) & missing(member_gender_)


	** 3g age
	rename enter_years_ age
	label var age "Age"

	** check age of first respondent
	tab age if member_num == "1", m
	assert age >= 18 & !missing(age) if member_num == "1"
	sum age if member_num == "1", d

	** 3h gender
	gen male = 1 if member_gender_ == 1
	replace male = 0 if member_gender_ == 2
	tab male, m
	label var male "Male"

	** 3i literacy
	gen literate = read_write_ == 1
	tab literate, m
	label var literate "Can read of write"

	** 3k enrolled in school
	rename enrolled_in_school_ enrolled_in_school
	tab enrolled_in_school, m
	label var enrolled_in_school "Currently enrolled in school or training"

	** 3l marital status
	rename member_marital_status_ marital_status

	** 3m occupation
	rename occupation_ occupation

	** 3n income in last month

	** 3o hours worked last week

	** 3p have documents

	** 3q bank account

	** 3r disabled
	rename disabled_ disabled
	tab disabled, m
	label var disabled "Disabled"

	** 3s disability percentage

	** 3t adopted orphan
	rename adopted_orphan_ adopted_orphan

	** 3u mother separated/divorced/widowed/remarried widow
	rename mother_widowed_ mother_widowed

	** 3v mother or father disabled
	rename mother_father_disabled_ mother_father_disabled


	****************
	** code up eligibilities for top 8 schemes in Rajasthan
	****************

	** make sure not missing key variables
	assert !missing(male)
	*assert !missing(age)

	** 1
	gen elig_sukanya = 0
	replace elig_sukanya = 1 if age <= 10 & !missing(age) & male == 0
	tab elig_sukanya, m

	** 2
	gen elig_postoffice = 0
	replace elig_postoffice = 1 
	tab elig_postoffice, m

	** 3 
	gen elig_laborcard = 0
	replace elig_laborcard = 1 if age >= 18 & age <= 60 & inlist(occupation, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37)
	tab elig_laborcard, m

	** 4
	gen elig_awas_yojana = 0 // not done
	replace elig_awas_yojana = 1 if age >=18 & !missing(age) & housekind_kuccha == 1 
	tab elig_awas_yojana, m

	** 5
	gen elig_oldage_pension = 0 
	replace elig_oldage_pension = 1 if age >= 58 & !missing(age) & male == 1 & enter_annual_income < 48000
	replace elig_oldage_pension = 1 if age >= 55 & !missing(age) & male == 0 & enter_annual_income < 48000
	tab elig_oldage_pension, m

	** 6
	gen elig_palanhaar = . // this was too sensitive, so we did not ask these questions
	tab elig_palanhaar, m

	** 7
	gen elig_widow_pension = 0
	replace elig_widow_pension = 1 if age >= 18 & !missing(age) & male == 0 & enter_annual_income < 48000 & inlist(marital_status,3,4,5,6)
	tab elig_widow_pension, m

	** 8
	gen elig_atalpension = 0
	replace elig_atalpension = 1 if age >=18 & age <=40 & !missing(age)
	tab elig_atalpension, m


	****************
	** code up whether individual is eligible for scheme and receiving scheme
	****************

	rename has_*_ has_*

	local j "sukanya postoffice laborcard oldage_pension widow_pension awas_yojana atalpension"
	foreach i of numlist 1/7  {
		local x: word `i' of `j'

		gen has_elig_`x' = .
		replace has_elig_`x' = 0 if elig_`x' == 1
		replace has_elig_`x' = 1 if elig_`x' == 1 & has_`x' == 1

		gen doesnt_have_elig_`x' = .
		replace doesnt_have_elig_`x' = 0 if elig_`x' == 1
		replace doesnt_have_elig_`x' = 1 if elig_`x' == 1 & has_`x' == 0

		gen has_not_elig_`x' = .
		replace has_not_elig_`x' = 0 if elig_`x' == 0
		replace has_not_elig_`x' = 1 if elig_`x' == 0 & has_`x' == 1

		gen not_elig_`x' = .
		replace not_elig_`x' = 0 if elig_`x' == 1
		replace not_elig_`x' = 1 if elig_`x' == 0
		
		assert !missing(has_elig_`x') if missing(has_not_elig_`x')
		assert missing(has_elig_`x') if !missing(has_not_elig_`x')
		assert !missing(has_not_elig_`x') if missing(has_elig_`x')
		assert missing(has_not_elig_`x') if !missing(has_elig_`x')
	}

	//count eligibility and store in locals 
	local j "sukanya postoffice oldage_pension widow_pension awas_yojana atalpension"
	foreach i of numlist 1/6  {
		local x: word `i' of `j'

		count if has_elig_`x' == 1
		local has_elig_`x' = `r(N)'
		count if doesnt_have_elig_`x' == 1
		local doesnt_have_elig_`x' = `r(N)'
	}
	*pause 

	//count *household* eligibility and store in locals 
	preserve
	collapse (sum) has_elig_* doesnt_have_elig_* has_not_elig_*, by(household_id)
	local j "sukanya postoffice oldage_pension widow_pension awas_yojana atalpension"
	foreach i of numlist 1/6  {
		local x: word `i' of `j'

		count if has_elig_`x' >= 1
		local h_has_elig_`x' = `r(N)'
		count if doesnt_have_elig_`x' >= 1
		local h_not_have_elig_`x' = `r(N)'
	}
	restore


	*************
	tostring household_id, replace
	gen member_id = household_id + member_num

	//keep only necessary variables
	keep member_id elig_* has_elig_* has_not_elig_* doesnt_have_elig_*

	tempfile individual_elig
	save `individual_elig', replace 


	//use household-level data from previous section 
	use `schemes_all_main', clear 
	*tostring household_id, replace
	
	merge m:1 household_id using "$intermediate_data/household_member_counts"
	pause
	tostring household_id, replace

	//_merge == 1 means erroneous or missing HHID
		//_merge == 2 means baseline HH has not yet been screened 
	keep if _merge == 3
	drop _merge
	merge m:1 household_id using `baseline_characteristics'
		//_merge == 1 means erroneous or missing HHID
		//_merge == 2 means baseline HH has not yet been screened
	keep if _merge == 3
	drop _merge	
	merge m:1 household_id using `village_strata'
		//_merge == 1 means erroneous or missing HHID 
		//_merge == 2 means baseline HH has not yet been screened 
	keep if _merge == 3
	drop _merge 




	//generate treatment groups 
	gen treat_100 = (discount == 1)
	gen treat_50 = (discount == .5)
	gen treat_0 = (discount == 0)


	***ADDED OCTOBER 16: MERGE IN SCREENING TREATMENT STATUS AS WELL***
	tempfile all_schemes_no_screening
	save `all_schemes_no_screening', replace 

	cd "$pilot"
	use "pilot_intervention_citizen_cumulative_all.dta", clear 

	//one obs per household 
	bysort household_id: keep if _n == 1
		
	//code discount status 
	gen screen_100 = (discount == 100)
	gen screen_50 = (discount == 50)
	gen screen_0 = (discount == 0)
	keep household_id screen_0 screen_50 screen_100 

	tempfile hh_screening_prices
	save `hh_screening_prices', replace 

	use `all_schemes_no_screening', clear 
	destring household_id, replace
	merge m:1 household_id using `hh_screening_prices'
	pause
	keep if _merge == 3
	drop _merge 
	***************************************************************

	//bottom-code zero income
	qui sum enter_annual_income if enter_annual_income != 0
	replace enter_annual_income = r(min) if enter_annual_income == 0
	//generate log income 
	gen log_income = ln(enter_annual_income)

	//generate dummies for village and stratum fixed effects
	tab village_id, gen(d_village_)
	tab stratum, gen(d_stratum_)
	***********************************************************************
	
	save "$intermediate_data/screen_amounts", replace
	



